This is analysis on Stimulated Monocytes - LPS,IFNb and Basal samples
(Stim cohort). There are 52 samples in each category, in total of 156
samples.
This markdown is divided by two parts ( LPS and IFNb ), followed by
Limma Gene Differential, DES and DEG comparision, and DES and DEG
Pathway comparison.
Genes that have mean TPM > 1 were selected to be analysed
(filtering by tpm value, and using that to filter the count
matrix)
Both Basal and Stimulated samples are distributed across two
batches
# No run
load("/Users/hyominseo/Desktop/RAJ/Gene_Differential/Stim/Rdata/gene_matrix.RData")
# Metadata for Stim
meta<-read_tsv("/Users/hyominseo/Desktop/RAJ/Stim_Limma/meta.tsv")
tpm<-as.data.frame(genes_tpm)
tpm<-tpm%>%dplyr::filter(rowMeans(.)>1)%>%
dplyr::mutate(SD=rowSds(as.matrix(.))) %>%
dplyr::filter(SD > 0) %>%
dplyr::select(.,-SD)
tpm<-as.data.frame(t(tpm))
tpm<-tpm%>%dplyr::filter (rownames(.) %in% meta$sample)
tpm<-as.data.frame(t(tpm))
count<-genes_counts%>%
dplyr::filter(rownames(.) %in% rownames(tpm))
count<-as.data.frame(t(count))
count<-count%>%dplyr::filter (rownames(.) %in% meta$sample)
count<-as.data.frame(t(count))
filepath<-file.path("/Users/hyominseo/Desktop/RAJ/Gene_Differential/Stim/Rdata")
save(count,tpm,meta, file = file.path(filepath, "Nav_156_gene_data.RData"))
#Making all Gene Name dataframe
all_gene<-as.data.frame(rownames(count))%>%
dplyr::rename("GENEID" = "rownames(count)")
gencode<-read_tsv("gencode.v30.tx2gene.tsv")%>%
distinct(GENEID, .keep_all=TRUE)%>%dplyr::select(-c("TXNAME"))
all_gene_name<-merge(all_gene,gencode, by = "GENEID")%>%dplyr::select(GENENAME)
filepath<-file.path("/Users/hyominseo/Desktop/RAJ/Gene_Differential/Stim")
write.table(all_gene_name, file = file.path(filepath,"Stimulated_All_Gene_Names.tsv"),
row.names = F, sep = "\t", quote = F)## [1] "Genes that have tpm>1 in basal monocytes for PD and Control sample: 58929 -> 12961"
reference: https://ucdavis-bioinformatics-training.github.io/2018-June-RNA-Seq-Workshop/thursday/DE.html
reference:
text=limma%20is%20an%20R%20package,analyses%20of%20RNA%2DSeq%20data.
Makes LPS_Basal.tsv and IFNb_Basal.tsv which is the entire Limma gene
differential toptable for all 12961 genes
Stim Limma functions have two models, contrast between ‘condition’ VS
Basal.
The filtered gene count matrix is normalized using
calcNormfactros.
Covariate ‘Batch’ encompasses all technical variables
all_cov <- list(
batch<-as.factor(meta$Batch),
disease<-as.factor(meta$Disease),
condition<-as.factor(meta$Condition),
sex<-as.factor(meta$Sex),
age<-meta$Age
)
gencode<-read_tsv("gencode.v30.tx2gene.tsv")%>%
distinct(GENEID, .keep_all=TRUE)%>%dplyr::select(-c("TXNAME"))
count_norm<-calcNormFactors(count, method="TMM")
model<-model.matrix(~0 + condition+batch+sex+age)
dge<-DGEList(counts=count, samples = meta, norm.factors= count_norm)
v <- voom(dge, model)
vfit<-lmFit(v,model)
diff<-function(fit_in, contr_in,file_name){
if(contr_in == "contr_IFNb"){contr<-makeContrasts(IFNb =(conditionIFNb - conditionBasal),
levels = colnames(coef(fit_in)))
}
if(contr_in == "contr_LPS"){contr<-makeContrasts(LPS =(conditionLPS - conditionBasal),
levels = colnames(coef(fit_in)))
}
vfit_in<-contrasts.fit(fit_in, contr)
efit<-eBayes(vfit_in)
print(summary(decideTests(efit, p.value = "0.05")))
DE_genes<-topTable(efit, sort.by = "p",n=Inf, coef = 1)
DE_genes$GENEID<-rownames(DE_genes)
DE_genes <- DE_genes[,c("GENEID", names(DE_genes)[1:6])]
DE_genes_ID<-merge(DE_genes,gencode, by = "GENEID")
filepath<-file.path("/Users/hyominseo/Desktop/RAJ/Gene_Differential/Stim/TopTable")
write.table(DE_genes_ID, file = file.path(filepath,file_name),
row.names = F, sep = "\t", quote = F)
}
diff(vfit, "contr_IFNb","IFNb_Basal.tsv")
diff(vfit, "contr_LPS","LPS_Basal.tsv")Contrast<-c("IFNb vs Basal", "LPS vs Basal")
Model_Formula<-c( "condition + batch + sex + age","condition + batch + sex + age")
Condition_Sample<-c(as.integer(length(grep("IFNb",meta$Condition))),as.integer(length(grep("LPS",meta$Condition))))
Control_Sample<-c(as.integer(length(grep("Basal",meta$Condition))),as.integer(length(grep("Basal",meta$Condition))))
df<-data.frame(Contrast, Model_Formula, Condition_Sample, Control_Sample)
dfvol_plot<-function(table_in, point_size, text_size,title_text_size,plot_title,label_size){
t<-read_tsv(table_in)
t$DE_genes<-case_when(
t$adj.P.Val < 0.05 & abs(t$logFC) > 1 ~ "TRUE",
t$adj.P.Val > 0.05 & abs(t$logFC) < 1 ~ "FALSE"
)
# Adding UP and Down regulated gene count
t$DE_direction<-case_when(
t$DE_genes == "TRUE" & t$logFC > 0.00 ~ "UP",
t$DE_genes == "TRUE" & t$logFC < 0.00 ~ "DOWN"
)
highlight_df<- t%>%drop_na(DE_genes)
highlight_df<-highlight_df%>%dplyr::filter(!grepl("FALSE", DE_genes))
highlight_df<-highlight_df%>%dplyr::filter(grepl("ADAR.*|APOBEC.*", GENENAME))
vol_plot<- ggplot(t, aes(x=logFC, y=-log10(P.Value), col = DE_direction))+
geom_point(size =point_size, alpha=0.7)+
theme_bw()+
scale_color_manual(values = wes_palette(n=3,name="GrandBudapest1"))+
geom_vline(xintercept=c(-1,1), col ="black",linetype="longdash")+
labs(title=plot_title)+
theme(plot.title = element_text(size=title_text_size, face="bold", hjust=0.5))+
theme(text = element_text(size = text_size)) +
theme(legend.position='none') +
geom_label_repel(data = highlight_df, aes(label = GENENAME),
box.padding = 0, point.padding = 0, force = 80,
segment.size = 0.2,segment.color = "black",
angle = 180,point.size =3,color="black",
nudge_x = 0.5,hjust =0,size = label_size)+
geom_point(data = highlight_df, size =point_size, color = "black")
vol_plot
}
sig_genes<-function(toptable){
df<-read_tsv(toptable)
df$DE_genes<-case_when(
# defining sig
df$adj.P.Val < 0.05 & abs(df$logFC) > 1 ~ "TRUE",
df$adj.P.Val > 0.05 & abs(df$logFC) < 1 ~ "FALSE"
)
df<-df%>%dplyr::filter(!grepl("FALSE", DE_genes))
df<-df%>%drop_na(DE_genes)
df$DE_direction<-case_when(
df$logFC > 0.00 ~ "UP",
df$logFC < 0.00 ~ "DOWN"
)
df
}
lps_DEG<-sig_genes("Stim/TopTable/LPS_Basal.tsv")
filepath<-file.path("/Users/hyominseo/Desktop/RAJ/Gene_Differential/Stim/TopTable")
write.table(lps_DEG, file = file.path(filepath,"LPS_Basal_DEG.tsv"),
row.names = F, sep = "\t", quote = F)
ifnb_DEG<-sig_genes("Stim/TopTable/IFNb_Basal.tsv")
filepath<-file.path("/Users/hyominseo/Desktop/RAJ/Gene_Differential/Stim/TopTable")
write.table(ifnb_DEG, file = file.path(filepath,"IFNb_Basal_DEG.tsv"),
row.names = F, sep = "\t", quote = F)text_size =10
title_text_size = 12
point_size=2
legend_size = 0.8
legend_text_size = 15
label_size = 4
lps_vol_plot<-vol_plot("Stim/TopTable/LPS_Basal.tsv",point_size, text_size, title_text_size, "LPS Stimulated Monocytes",label_size)
lps_vol_plot<-ggarrange(lps_vol_plot)
ggsave(plot=lps_vol_plot,filename="Stim/Figures/figure1:lps_vol_plot.jpg",width = 10, height = 5,dpi = 600)LPS DEGenes Volcano Plot
lps_DEG<- read_tsv("Stim/TopTable/LPS_Basal_DEG.tsv")
DE_UP_Count<-c( as.integer(length(which(lps_DEG$DE_direction == "UP"))))
DE_DOWN_Count<-c( as.integer(length(which(lps_DEG$DE_direction == "DOWN"))))
Model<-c("LPS_Basal")
Model_Formula<-c("condition + batch + sex + age")
df<-data.frame( Model, Model_Formula, DE_UP_Count, DE_DOWN_Count)
dflps_DEG_a<-lps_DEG%>%dplyr::filter(grepl("ADAR.*|APOBEC.*", GENENAME))
lps_DEG_a<-lps_DEG_a%>%dplyr::select(-c(AveExpr, t, B,DE_genes))%>%arrange(desc(logFC))
knitr::kable(lps_DEG_a, "simple", caption = "A~ genes in LPS_DEG", table.attr = "style='width:100%;'")| GENEID | logFC | P.Value | adj.P.Val | GENENAME | DE_direction |
|---|---|---|---|---|---|
| ENSG00000128383.13 | 1.827449 | 0 | 3e-07 | APOBEC3A | UP |
| ENSG00000197381.16 | 1.170770 | 0 | 0e+00 | ADARB1 | UP |
Pathways of DEGenes and DESites (annotated by genename,and with relazed threshold) are analysed. Gprofiler uses custom domain, which is the vector of all gene names found in samples ( the subjects to the DEG analysis)
# abs(log_fc) > 1 dplyr::filter(score1>45 & score2>45)
LPS_Basal_DEG<- read_tsv("Stim/TopTable/LPS_Basal_DEG.tsv")
LPS_Basal_DES<- read_tsv("Stim/TopTable/LPS_Basal_DE_Sites.tsv")
LPS_Basal_DES$DE_direction<-case_when(
LPS_Basal_DES$logFC > 0.00 ~ "UP",
LPS_Basal_DES$logFC < 0.00 ~ "DOWN"
)
DE_UP_Count<-c( as.integer(length(which(LPS_Basal_DEG$DE_direction == "UP"))),
as.integer(length(which(LPS_Basal_DES$DE_direction == "UP"))))
DE_DOWN_Count<-c( as.integer(length(which(LPS_Basal_DEG$DE_direction == "DOWN"))),
as.integer(length(which(LPS_Basal_DES$DE_direction == "DOWN"))))
Model<-c("LPS_Basal_DEGenes","LPS_Basal_DESites")
Model_Formula<-c("condition + disease + batch + sex + age","condition + disease + batch + sex + age")
Threshold<-c("(adj.P.Val<= 0.05), logFC>1", "(adj.P.Val<= 0.05)")
df<-data.frame( Model, Model_Formula, Threshold,DE_UP_Count, DE_DOWN_Count)
dfLPS_Basal_DEG_gp<-gprofiler("Stim/TopTable/LPS_Basal_DEG.tsv")## [1] "Number of UP regulated genes: 747 Number of DOWN regulated genes: 311"
LPS_Basal_DEG_gp_plot<-gostplot(LPS_Basal_DEG_gp, capped = TRUE, interactive = TRUE)
LPS_Basal_DEG_gp_plot#LPS_Basal_DEG_gp <- LPS_Basal_DEG_gp$result%>%arrange(desc(precision))
#filepath<-"/Users/hyominseo/Desktop/RAJ/Gene_Differential/Stim/gp_data"
#write_tsv(LPS_Basal_DEG_gp, file
# =file.path(filepath,"Custom_LPS_Basal_DEG_Path.tsv"))LPS_Basal_DEG_gp<-read_tsv("Stim/gp_data/Custom_LPS_Basal_DEG_Path.tsv")
LPS_Basal_DEG_gp<-process_path(LPS_Basal_DEG_gp)
down<-LPS_Basal_DEG_gp%>%dplyr::filter(grepl("down-regulated",query))
down_go<-down%>%dplyr::filter(grepl("GO:BP",source))
down_kegg<-down%>%dplyr::filter(grepl("KEGG",source))
up<-LPS_Basal_DEG_gp%>%dplyr::filter(grepl("up-regulated",query))
up_go<-up%>%dplyr::filter(grepl("GO:BP",source))
up_kegg<-up%>%dplyr::filter(grepl("KEGG",source))
name<-c("down_go","down_kegg","up_go","up_kegg")
count<-c(nrow(down_go), nrow(down_kegg), nrow(up_go), nrow(up_kegg))
mean<-c((mean(down_go$log_P_value)), (mean(down_kegg$log_P_value)), (mean(up_go$log_P_value)), (mean(up_kegg$log_P_value)))
df<-data.frame(name, count, mean)
knitr::kable(df, format = "simple", table.attr = "style='width:80%;'",caption = "LPS DEG Pathway",col.names=c('Direction_Source', "Count","Mean")) | Direction_Source | Count | Mean |
|---|---|---|
| down_go | 430 | 11.08565 |
| down_kegg | 59 | 10.96950 |
| up_go | 976 | 15.43278 |
| up_kegg | 66 | 11.93939 |
## [1] "Number of UP regulated genes: 872 Number of DOWN regulated genes: 205"
| Direction_Source | Count | Mean |
|---|---|---|
| down_go | 361 | 10.430323 |
| down_kegg | 63 | 9.445332 |
| up_go | 1125 | 16.643681 |
| up_kegg | 84 | 13.563554 |
DEG_path<-read_tsv("Stim/gp_data/Custom_LPS_Basal_DEG_Path.tsv")
DES_path<-read_tsv("Stim/gp_data/Custom_LPS_Basal_DESites_Path.tsv")
DEG_path_match<-DEG_path%>%dplyr::filter(term_name %in% DES_path$term_name)
DES_path_match<-DES_path%>%dplyr::filter(term_name %in% DEG_path$term_name)
# 2161
DEG_path<-process_path(DEG_path_match)%>%dplyr::rename("logP_DEG" = "log_P_value")
# 2062
DES_path<-process_path(DES_path_match)%>%dplyr::rename("logP_DES" = "log_P_value")
# inner_join DEGene and DESite pathway by term id
DEG_DES_path_go<-inner_join(DEG_path, DES_path, by = "term_id")%>%
dplyr::filter(!grepl("TF|CORUM", source.x))
#filepath<-"/Users/hyominseo/Desktop/RAJ/Gene_Differential/Stim/TopTable"
#write_tsv(DEG_DES_path_go, file = file.path(filepath, "LPS_DEG_DES_path_match.tsv"))
Path<-(c("DEG_path","DES_path","DEG_DES_inner_join"))
Count<-(c(as.integer(nrow(DEG_path)), as.integer(nrow(DES_path)),as.integer(nrow(DEG_DES_path_go))))
Unique_id<-(c(as.integer(length(unique(DEG_path$term_id))),
as.integer(length(unique(DES_path$term_id))),
as.integer(length(unique(DEG_DES_path_go$term_id)))))
df<-data_frame(Path, Count, Unique_id)
writeLines("td, th { padding : 6px } th { background-color : blue ; color : white; border : 1px solid white; } td { color : blue ; border : 1px solid blue }", con = "mystyle.css")
knitr::kable(df, "simple", caption = "DEGenes_DESites_Shared_Gene_pathway", table.attr = "style='width:100%;'",col.names=c("Path", "Count","Unique ID"))| Path | Count | Unique ID |
|---|---|---|
| DEG_path | 1459 | 1052 |
| DES_path | 1383 | 1051 |
| DEG_DES_inner_join | 2086 | 1051 |
knitr::kable(table(DEG_DES_path_go$source.x), "simple", caption = "_Shared_Gene_pathway_by_Source", table.attr = "style='width:100%;'",col.names=c("GO: source", "Count"))| GO: source | Count |
|---|---|
| GO:BP | 1908 |
| KEGG | 178 |
path_pval_plot<-function(table_in,plot_title,point_size,text_size,title_text_size,inplot_text_size){
t<-read_tsv(table_in)
print(colnames(t))
plot<-ggplot(data= t, aes(x = t$logP_DEG, y = t$logP_DES))+
geom_point(aes(color=source.x),alpha=0.8,size = point_size)+
stat_cor(size = inplot_text_size,
label.x.npc = "left",
label.y.npc = "top")+
geom_smooth(method = "lm", se = FALSE, colour="black")+
theme_bw()+
facet_wrap(facets =~source.x, scales ='free')+
theme(text = element_text(size =text_size))+
labs(title = plot_title)+
theme(plot.title = element_text(size=title_text_size, face="bold", hjust=0.5),
axis.title.x = element_text(size =text_size),
axis.title.y = element_text(size =text_size))+
xlab("logP_DEGene")+ylab("logP_DESite")+
theme(legend.position ='none')
plot
}load("/Users/hyominseo/Desktop/RAJ/Gene_Differential/Stim/Rdata/Nav_156_gene_data.RData")
load("/Users/hyominseo/Desktop/RAJ/Stim_Limma/Rdata/nav_data_1.RData")
tpm_limk<-tpm%>%dplyr::filter(grepl("ENSG00000182541", rownames(.)))
tpm_limk<-as.data.frame(t(tpm_limk))
tpm_limk<-tpm_limk%>%dplyr::rename("Expression_TPM"="ENSG00000182541.18")
tpm_limk<-tpm_limk%>%mutate(Expression_TPM = log2(Expression_TPM+1))
tpm_limk<-tpm_limk%>%rownames_to_column("Sample")
editing_limk<-editing%>%dplyr::filter(grepl("chr22:31277021:T:C",rownames(.)))
editing_limk<-editing_limk%>%dplyr::select(-c("editing_index"))
editing_limk<-as.data.frame(t(editing_limk))
editing_limk<-editing_limk%>%dplyr::rename("Editing_Rate"="chr22:31277021:T:C")
editing_limk<-editing_limk%>%rownames_to_column("Sample")
editing_limk<-editing_limk%>%dplyr::filter( Sample %in% tpm_limk$Sample)
limk_merge <- merge(tpm_limk, editing_limk,by = "Sample")
limk_merge$Condition<-str_split_fixed(limk_merge$Sample, "-", 3)[,3]
limk_merge<-limk_merge%>%column_to_rownames("Sample")text_size =18
title_text_size = 20
point_size=5
legend_size = 1
legend_text_size = 15
label_size = 6
inplot_text_size =8
lps_vol_plot<-vol_plot("Stim/TopTable/LPS_Basal.tsv",point_size, text_size, title_text_size, "Differentially Expressed Genes",label_size)
lps_path_pval_plot<-path_pval_plot("Stim/TopTable/LPS_DEG_DES_path_match.tsv","DES and DEG Shared Pathways",point_size,text_size,title_text_size,inplot_text_size)
lps_1_plot<-ggarrange(lps_vol_plot,lps_path_pval_plot,
labels =c("A","B"),
font.label=list(color="black",size= text_size),
ncol=2)
lps_1_plot<-annotate_figure(lps_1_plot,
top=text_grob("LPS-Stimulated Monocytes",
color = "black", face = "bold", size =title_text_size))
lps_mut_logfc_plot<-mut_logfc_plot("Stim/TopTable/LPS_DEG_DES_match.tsv",point_size,text_size, "Mutation",legend_size,legend_text_size, label_size)
lps_loc_logfc_plot<-loc_logfc_plot("Stim/TopTable/LPS_DEG_DES_match.tsv",point_size,text_size,
"Location", legend_size,legend_text_size, label_size,inplot_text_size)
lps_2_plot<-ggarrange(lps_mut_logfc_plot, lps_loc_logfc_plot,
labels =c("C","D"),
font.label=list(color="black",size= text_size),
ncol=2)
lps_2_plot<-annotate_figure(lps_2_plot,
top=text_grob("DES and DEG Shared Genes Annotation",
color = "black", face = "bold", size =title_text_size))
tpm_rate_plot<-
ggplot(limk_merge,aes(x=Expression_TPM, y=Editing_Rate))+
theme_bw() +
geom_jitter(aes(color=Condition),alpha=0.8, size = 6)+
scale_color_manual(values = wes_palette(n=3,name="GrandBudapest1"))+
stat_cor(size = inplot_text_size,
label.x =6,
label.y.npc = "top")+
labs(x = "log(Expression TPM)", y = "RNA Editing Rate")+
theme(axis.text.x=element_text(size=text_size),
axis.text.y=element_text(size=text_size))+
theme(text = element_text(size =text_size))+
labs(title="Gene LIMK2 TPM vs Editing Rate")+
theme(plot.title = element_text(size=title_text_size, face="bold", hjust=0.5))+
theme(legend.key.size = unit(2,'cm'),
legend.title = element_text(color = "black", size =20),
legend.text = element_text(color = "black", size = 20))+
guides(color = guide_legend(override.aes = list(size = 10)))
tpm_rate_plot<-ggarrange(tpm_rate_plot,
labels =c("E"),
font.label=list(color="black",size= text_size))
tpm_rate_plot<-annotate_figure(tpm_rate_plot,
top=text_grob("DES and DEG Shared Non-SNV",
color = "black", face = "bold", size =title_text_size))
lps_plot<-ggarrange(lps_1_plot,lps_2_plot,tpm_rate_plot,
nrow=3)
ggsave(plot=lps_plot,filename="Stim/Figures/figure2:lps_all_plot.jpg",width = 20, height = 15,dpi = 600)All plots for LPS Samples
LPS_Basal_DE_Sites<-read_tsv("Stim/TopTable/LPS_1_toptable.tsv")%>%
dplyr::filter(adj.P.Val <= 0.05)
LPS_Basal_DE_Sites_Gene<-read_tsv("Stim/TopTable/LPS_Basal_DE_Sites.tsv")
LPS_Basal_DEG<-read_tsv("Stim/TopTable/LPS_Basal_DEG.tsv")
Category<-c("DE_Sites, Site count", "DE_Sites_Gencode_Annotated, Gene count", "DE_Genes, Gene count")
Threshold<-c("adj.P < 0.05","adj.P < 0.05","adj.P.Val < 0.05 & abs(logFC) > 1")
Count<-c(as.integer(nrow(LPS_Basal_DE_Sites)), as.integer(nrow(LPS_Basal_DE_Sites_Gene)),
as.integer(nrow(LPS_Basal_DEG)))
Unique_Gene<- c("All Sites Unique",as.integer(length(unique(LPS_Basal_DE_Sites_Gene$GENENAME))),
as.integer(length(unique(LPS_Basal_DEG$GENENAME))))
df<-data.frame(Threshold, Category, Count, Unique_Gene)
knitr::kable(df, "simple", caption = "LPS: All Count", table.attr = "style='width:100%;'")| Threshold | Category | Count | Unique_Gene |
|---|---|---|---|
| adj.P < 0.05 | DE_Sites, Site count | 1077 | All Sites Unique |
| adj.P < 0.05 | DE_Sites_Gencode_Annotated, Gene count | 1077 | 387 |
| adj.P.Val < 0.05 & abs(logFC) > 1 | DE_Genes, Gene count | 1058 | 1056 |
text_size =10
title_text_size = 12
point_size=2
legend_size = 0.8
legend_text_size = 15
label_size = 4
ifnb_vol_plot<-vol_plot("Stim/TopTable/IFNb_Basal.tsv",point_size, text_size, title_text_size, "IFNb Stimulated Monocytes",label_size)
ifnb_vol_plot<-ggarrange(ifnb_vol_plot,
labels=c("A"),
font.label=list(color="black",size= text_size))
ggsave(plot=ifnb_vol_plot,filename="Stim/Figures/figure3:ifnb_vol_plot.jpg",width = 10, height = 5,dpi = 600)knitr::include_graphics("Stim/Figures/figure3:ifnb_vol_plot.jpg")IFNb DEGenes volcano plot
ifnb_DEG<- read_tsv("Stim/TopTable/IFNb_Basal_DEG.tsv")
DE_UP_Count<-c( as.integer(length(which(ifnb_DEG$DE_direction == "UP"))))
DE_DOWN_Count<-c( as.integer(length(which(ifnb_DEG$DE_direction == "DOWN"))))
Model<-c("IFNb_Basal")
Model_Formula<-c("condition + batch + sex + age")
df<-data.frame( Model, Model_Formula, DE_UP_Count, DE_DOWN_Count)
dfifnb_DEG_a<-ifnb_DEG%>%dplyr::filter(grepl("ADAR.*|APOBEC.*", GENENAME))
ifnb_DEG_a<-ifnb_DEG_a%>%dplyr::select(-c(AveExpr, t, B,DE_genes))%>%arrange(desc(logFC))
knitr::kable(ifnb_DEG_a, "simple", caption = "A~ genes IFNb_DEG", table.attr = "style='width:100%;'")| GENEID | logFC | P.Value | adj.P.Val | GENENAME | DE_direction |
|---|---|---|---|---|---|
| ENSG00000128383.13 | 6.966449 | 0 | 0 | APOBEC3A | UP |
| ENSG00000179750.16 | 4.969790 | 0 | 0 | APOBEC3B | UP |
| ENSG00000100298.15 | 3.328001 | 0 | 0 | APOBEC3H | UP |
| ENSG00000128394.17 | 3.158258 | 0 | 0 | APOBEC3F | UP |
| ENSG00000239713.9 | 3.132144 | 0 | 0 | APOBEC3G | UP |
| ENSG00000243811.10 | 2.435091 | 0 | 0 | APOBEC3D | UP |
| ENSG00000160710.16 | 1.793903 | 0 | 0 | ADAR | UP |
| ENSG00000244509.4 | 1.475170 | 0 | 0 | APOBEC3C | UP |
| ENSG00000197381.16 | 1.111013 | 0 | 0 | ADARB1 | UP |
Pathways of DEGenes and DESites (annotated by genename,and with relazed threshold) are analysed.
# abs(log_fc) > 1 dplyr::filter(score1>45 & score2>45)
IFNb_Basal_DEG<- read_tsv("Stim/TopTable/IFNb_Basal_DEG.tsv")
IFNb_Basal_DES<- read_tsv("Stim/TopTable/IFNb_Basal_DE_Sites.tsv")
IFNb_Basal_DES$DE_direction<-case_when(
IFNb_Basal_DES$logFC > 0.00 ~ "UP",
IFNb_Basal_DES$logFC < 0.00 ~ "DOWN"
)
DE_UP_Count<-c( as.integer(length(which(IFNb_Basal_DEG$DE_direction == "UP"))),
as.integer(length(which(IFNb_Basal_DES$DE_direction == "UP"))))
DE_DOWN_Count<-c( as.integer(length(which(IFNb_Basal_DEG$DE_direction == "DOWN"))),
as.integer(length(which(IFNb_Basal_DES$DE_direction == "DOWN"))))
Model<-c("IFNb_Basal_DEGenes","IFNb_Basal_DESites")
Model_Formula<-c("condition + disease + batch + sex + age","condition + disease + batch + sex + age")
Threshold<-c("(adj.P.Val<= 0.05), logFC>1", "(adj.P.Val<= 0.05)")
df<-data.frame( Model, Model_Formula, Threshold,DE_UP_Count, DE_DOWN_Count)
df## [1] "Number of UP regulated genes: 1688 Number of DOWN regulated genes: 1569"
| Direction_Source | Count | Mean |
|---|---|---|
| down_go | 1216 | 18.74414 |
| down_kegg | 88 | 11.87891 |
| up_go | 1263 | 19.28232 |
| up_kegg | 91 | 11.16183 |
## [1] "Number of UP regulated genes: 3209 Number of DOWN regulated genes: 586"
| Direction_Source | Count | Mean |
|---|---|---|
| down_go | 789 | 14.89881 |
| down_kegg | 68 | 12.94640 |
| up_go | 1806 | 26.53062 |
| up_kegg | 111 | 12.44709 |
There exist a statically significant relation between logP of DEGenes pathway and DESites pathway for sources GO Biological Process and KEGG
DEG_path<-read_tsv("Stim/gp_data/Custom_IFNb_Basal_DEG_Path.tsv")
DES_path<-read_tsv("Stim/gp_data/Custom_IFNb_Basal_DESites_Path.tsv")
DEG_path_match<-DEG_path%>%dplyr::filter(term_name %in% DES_path$term_name)
DES_path_match<-DES_path%>%dplyr::filter(term_name %in% DEG_path$term_name)
# 3800
DEG_path<-process_path(DEG_path_match)%>%dplyr::rename("logP_DEG" = "log_P_value")
# 2932
DES_path<-process_path(DES_path_match)%>%dplyr::rename("logP_DES" = "log_P_value")
# inner_join DEGene and DESite pathway by term id
DEG_DES_path_go<-inner_join(DEG_path, DES_path, by = "term_id")%>%
# filter MIRNA and TF because there are only 2 each
dplyr::filter(!grepl("MIRNA|TF|CORUM|HP", source.x))
#filepath<-"/Users/hyominseo/Desktop/RAJ/Gene_Differential/Stim/TopTable"
#write_tsv(DEG_DES_path_go, file = file.path(filepath, "IFNb_DEG_DES_path_match.tsv"))
Path<-(c("DEG_path","DES_path","DEG_DES_inner_join"))
Count<-(c(as.integer(nrow(DEG_path)), as.integer(nrow(DES_path)),as.integer(nrow(DEG_DES_path_go))))
Unique_id<-(c(as.integer(length(unique(DEG_path$term_id))),
as.integer(length(unique(DES_path$term_id))),
as.integer(length(unique(DEG_DES_path_go$term_id)))))
df<-data_frame(Path, Count, Unique_id)
writeLines("td, th { padding : 6px } th { background-color : blue ; color : white; border : 1px solid white; } td { color : blue ; border : 1px solid blue }", con = "mystyle.css")
knitr::kable(df, "simple", caption = "DEGenes_DESites_pathway", table.attr = "style='width:100%;'",col.names=c("Path", "Count","Unique ID"))| Path | Count | Unique ID |
|---|---|---|
| DEG_path | 2471 | 1293 |
| DES_path | 1939 | 1297 |
| DEG_DES_inner_join | 3730 | 1293 |
knitr::kable(table(DEG_DES_path_go$source.x), "simple", caption = "IFNb : DEGenes_DESites_mathed pathway", table.attr = "style='width:100%;'",col.names=c("GO: source", "Count"))| GO: source | Count |
|---|---|
| GO:BP | 3505 |
| KEGG | 225 |
load("/Users/hyominseo/Desktop/RAJ/Gene_Differential/Stim/Rdata/Nav_156_gene_data.RData")
# APOBEC3F in IFNB DEG : ENSG00000128394.17
tpm_3f<-tpm%>%dplyr::filter(grepl("ENSG00000128394", rownames(.)))
tpm_3f<-as.data.frame(t(tpm_3f))
tpm_3f<-tpm_3f%>%dplyr::rename("Expression_TPM"="ENSG00000128394.17")
tpm_3f<-tpm_3f%>%rownames_to_column("Sample")
tpm_3f<-tpm_3f%>%mutate(Expression_TPM = log2(Expression_TPM+1))
editing_3f_stop<-editing%>%dplyr::filter(grepl("chr22:39052279:G:A",rownames(.)))
editing_3f_stop<-editing_3f_stop%>%dplyr::select(-c("editing_index"))
editing_3f_stop<-as.data.frame(t(editing_3f_stop))
editing_3f_stop<-editing_3f_stop%>%dplyr::rename("Editing_Rate"="chr22:39052279:G:A")
editing_3f_stop<-editing_3f_stop%>%rownames_to_column("Sample")
editing_3f_stop<-editing_3f_stop%>%dplyr::filter( Sample %in% tpm_3f$Sample)
apobec3f_merge_stop <- merge(tpm_3f, editing_3f_stop,by = "Sample")
apobec3f_merge_stop$Condition<-str_split_fixed(apobec3f_merge_stop$Sample, "-", 3)[,3]
# APOBEC3B in IFNB DEG : ENSG00000179750.16
tpm_3b<-tpm%>%dplyr::filter(grepl("ENSG00000179750", rownames(.)))
tpm_3b<-as.data.frame(t(tpm_3b))
tpm_3b<-tpm_3b%>%dplyr::rename("Expression_TPM"="ENSG00000179750.16")
tpm_3b<-tpm_3b%>%mutate(Expression_TPM = log2(Expression_TPM+1))
tpm_3b<-tpm_3b%>%rownames_to_column("Sample")
editing_3b_non<-editing%>%dplyr::filter(grepl("chr22:38989603:G:A",rownames(.)))
editing_3b_non<-editing_3b_non%>%dplyr::select(-c("editing_index"))
editing_3b_non<-as.data.frame(t(editing_3b_non))
editing_3b_non<-editing_3b_non%>%dplyr::rename("Editing_Rate"="chr22:38989603:G:A")
editing_3b_non<-editing_3b_non%>%rownames_to_column("Sample")
editing_3b_non<-editing_3b_non%>%dplyr::filter( Sample %in% tpm_3b$Sample)
apobec3b_merge_non <- merge(tpm_3b, editing_3b_non,by = "Sample")
apobec3b_merge_non$Condition<-str_split_fixed(apobec3b_merge_non$Sample, "-", 3)[,3]
batch<-meta%>%dplyr::select(sample, Batch)
batch<-batch%>%dplyr::rename("Sample"="sample")
apobec3b_merge_non<-merge(apobec3b_merge_non, batch, by ="Sample")text_size =18
title_text_size = 20
point_size=5
legend_size = 1
legend_text_size = 15
label_size = 5.5
inplot_text_size =7.5
ifnb_vol_plot<-vol_plot("Stim/TopTable/IFNb_Basal.tsv",point_size, text_size, title_text_size, "Differentially Expressed Genes",label_size)
ifnb_path_pval_plot<-path_pval_plot("Stim/TopTable/IFNb_DEG_DES_path_match.tsv","DES and DEG Shared Pathways",point_size,text_size,title_text_size,inplot_text_size)
ifnb_1_plot<-ggarrange(ifnb_vol_plot,ifnb_path_pval_plot,
labels =c("A","B"),
font.label=list(color="black",size= text_size),
ncol=2)
IFNb_DEG_DES<-read_tsv("Stim/TopTable/IFNb_DEG_DES_match.tsv")
ifnb_non<-IFNb_DEG_DES%>%dplyr::filter(grepl("Nonsynonymous|Stopgain", Mutation))
ifnb_non<-ifnb_non%>%dplyr::filter(grepl("APOBEC*", GENENAME_DES))
ifnb_mut_logfc_plot<-mut_logfc_plot("Stim/TopTable/IFNb_DEG_DES_match.tsv",point_size,text_size, "Mutation (w/o Noncoding)", legend_size,legend_text_size,label_size, ifnb_non)
ifnb_loc_logfc_plot<-loc_logfc_plot("Stim/TopTable/IFNb_DEG_DES_match.tsv",point_size,text_size, "Location",legend_size,legend_text_size,label_size,inplot_text_size,ifnb_non)
ifnb_2_plot<-ggarrange(ifnb_mut_logfc_plot, ifnb_loc_logfc_plot,
labels =c("C","D"),
font.label=list(color="black",size= text_size),
ncol=2)
ifnb_2_plot<-annotate_figure(ifnb_2_plot,
top=text_grob("DES and DEG Shared Genes Annotation",
color = "black", face = "bold", size
=title_text_size))
tpm_rate_plot_3f<-
ggplot(apobec3f_merge_stop,aes(x=Expression_TPM, y=Editing_Rate))+
theme_bw() +
geom_jitter(aes(color=Condition),alpha=0.8, size = 6)+
scale_color_manual(values = wes_palette(n=3,name="GrandBudapest1"))+
stat_cor(size = inplot_text_size,
label.x =4.3,
label.y.npc = "middle")+
labs(x = "log(Expression TPM)", y = "RNA Editing Rate")+
theme(axis.text.x=element_text(size=text_size),
axis.text.y=element_text(size=text_size))+
theme(text = element_text(size =text_size))+
labs(title="APOBEC3F Stopgain: TPM vs Editing Rate")+
theme(plot.title = element_text(size=title_text_size, face="bold", hjust=0.5))+
theme(legend.key.size = unit(legend_size,'cm'),
legend.title = element_text(color = "black", size =legend_text_size),
legend.text = element_text(color = "black", size = legend_text_size))+
guides(color = guide_legend(override.aes = list(size = 7)))
tpm_rate_plot_3b<-
ggplot(apobec3b_merge_non,aes(x=Expression_TPM, y=Editing_Rate, shape=Batch))+
theme_bw() +
geom_jitter(aes(color=Condition),alpha=0.8, size = 6)+
scale_color_manual(values = wes_palette(n=3,name="GrandBudapest1"))+
stat_cor(size = inplot_text_size,
label.x=6,
label.y.npc = "middle")+
labs(x = "log(Expression TPM)", y = "RNA Editing Rate")+
theme(axis.text.x=element_text(size=text_size),
axis.text.y=element_text(size=text_size))+
theme(text = element_text(size =text_size))+
labs(title="APOBEC3B Nonsyn SNV: TPM vs Editing Rate")+
theme(plot.title = element_text(size=title_text_size, face="bold", hjust=0.5))+
theme(legend.key.size = unit(legend_size,'cm'),
legend.title = element_text(color = "black", size =legend_text_size),
legend.text = element_text(color = "black", size = legend_text_size))+
guides(color = guide_legend(override.aes = list(size = 7)))
ifnb_3_plot<-ggarrange(tpm_rate_plot_3b, tpm_rate_plot_3f,
labels =c("E","F"),
font.label=list(color="black",size= text_size),
ncol=2)
ifnb_plot<-ggarrange(ifnb_1_plot,ifnb_2_plot,ifnb_3_plot, nrow=3)
ifnb_plot<-annotate_figure(ifnb_plot, top=text_grob("IFNb-Stimualted Monocytes",
color = "black", face = "bold", size =title_text_size))
ggsave(plot=ifnb_plot,filename="Stim/Figures/figure4:ifnb_all_plot.jpg",width = 20, height = 15,
dpi = 600)All plots for IFNb samples
IFNb_Basal_DE_Sites<-read_tsv("Stim/TopTable/IFNb_1_toptable.tsv")%>%
dplyr::filter(adj.P.Val <= 0.05)
IFNb_Basal_DE_Sites_Gene<-read_tsv("Stim/TopTable/IFNb_Basal_DE_Sites.tsv")
IFNb_Basal_DEG<-read_tsv("Stim/TopTable/IFNb_Basal_DEG.tsv")
Category<-c("DE_Sites, Site count", "DE_Sites_Gencode_Annotated, Gene count", "DE_Genes, Gene count")
Threshold<-c("adj.P < 0.05","adj.P < 0.05","adj.P.Val < 0.05 & abs(logFC) > 1")
Count<-c(as.integer(nrow(IFNb_Basal_DE_Sites)), as.integer(nrow(IFNb_Basal_DE_Sites_Gene)),
as.integer(nrow(IFNb_Basal_DEG)))
Unique_Gene<- c("All Sites Unique",as.integer(length(unique(IFNb_Basal_DE_Sites_Gene$GENENAME))),
as.integer(length(unique(IFNb_Basal_DEG$GENENAME))))
df<-data.frame(Threshold, Category, Count, Unique_Gene)
knitr::kable(df, "simple", caption = "IFNb: All Count", table.attr = "style='width:100%;'")| Threshold | Category | Count | Unique_Gene |
|---|---|---|---|
| adj.P < 0.05 | DE_Sites, Site count | 3795 | All Sites Unique |
| adj.P < 0.05 | DE_Sites_Gencode_Annotated, Gene count | 3795 | 1374 |
| adj.P.Val < 0.05 & abs(logFC) > 1 | DE_Genes, Gene count | 3257 | 3250 |
vol_plot<-function(t, point_size, text_size,title_text_size,plot_title,label_size){
vol_plot<- ggplot(t, aes(x=logFC, y=-log10(P.Value), col = DE_direction))+
geom_point(size =point_size, alpha=0.7)+
theme_bw()+
scale_color_manual(values = wes_palette(n=5,name="Rushmore1"))+
geom_vline(xintercept=c(-1,1), col ="black",linetype="longdash")+
labs(title=plot_title)+
theme(plot.title = element_text(size=title_text_size, face="bold", hjust=0.5))+
theme(text = element_text(size = text_size)) +
theme(legend.position='none')
vol_plot
}
process_deg<-function(table_in){
t<-read_tsv(table_in)
t$DE_genes<-case_when(
t$adj.P.Val < 0.05 & abs(t$logFC) > 1 ~ "TRUE",
t$adj.P.Val > 0.05 & abs(t$logFC) < 1 ~ "FALSE"
)
# Adding UP and Down regulated gene count
t$DE_direction<-case_when(
t$DE_genes == "TRUE" & t$logFC > 0.00 ~ "UP",
t$DE_genes == "TRUE" & t$logFC < 0.00 ~ "DOWN"
)
t
}text_size =12
title_text_size=12
legend_size =1.2
legend_text_size = 12
point_size =2.4
label_size = 3
lps_risk<-process_deg("Stim/TopTable/LPS_Basal.tsv")
highlight_df<-lps_risk%>%dplyr::filter(grepl("LRRK2|PLCG2|PILRA",GENENAME))
lps_vol_plot<-vol_plot(lps_risk,point_size, text_size, title_text_size, "LPS vs Basal",label_size)+
geom_label_repel(data = highlight_df, aes(label = GENENAME, col = GENENAME),
box.padding = 0, max.overlaps = 3,
point.padding = 0, force = 80,segment.size = 0.2,
point.size =point_size,fontface="bold",
nudge_x = -0.1,hjust =0,size = label_size)+
geom_point(data = highlight_df, aes(label = GENENAME, col = GENENAME),size=3)+
theme(legend.position = "none")
ifnb_risk<-process_deg("Stim/TopTable/IFNb_Basal.tsv")
highlight_df<-ifnb_risk%>%dplyr::filter(grepl("LRRK2|PLCG2|PILRA",GENENAME))
ifnb_vol_plot<-vol_plot(ifnb_risk,point_size, text_size, title_text_size, "IFNb vs Basal",label_size)+
geom_label_repel(data = highlight_df, aes(label = GENENAME, col = GENENAME),
box.padding = 0, max.overlaps = 3,
point.padding = 0, force = 80,segment.size = 0.2,
point.size =point_size,fontface="bold",
nudge_x = -0.1,hjust =0,size = label_size)+
geom_point(data = highlight_df, aes(label = GENENAME, col = GENENAME),size=3)+
theme(legend.position = "none")
AD_risk_stim<-ggarrange(lps_vol_plot,ifnb_vol_plot,
labels=c("E","F"),
font.label=list(color="black",size= text_size),
ncol=2, common.legend = TRUE, legend="top")
AD_risk_stim<-annotate_figure(AD_risk_stim,
top=text_grob("Risk Genes Found in Stim DEG",
color = "black", face = "bold", size =title_text_size))
ggsave(plot=AD_risk_stim,filename="Stim/Figures/figure5:PD_AD_risk_DEG.jpg",width = 10, height = 4,dpi = 600)PD AD Risk genes in Stim DEG